##################################################
# Replication Code
# Taeyong Park and Andrew Reeves
# "Local Unemployment and Voting for President: Uncovering Causal Mechanisms"
# Summary: Data Setup for 2012 Analysis
##################################################


rm(list = ls())
library(foreign)
library(stringr)


#########################
#
# I. CREATE MERGED DATA #
#
#########################

#######################
# 1. Import Data Sets #
#######################

## CCES data
data=get(load("CCES12_Common_VV.RData"))

## Unemployment data from the Bureau of Labor Statistics website, https://download.bls.gov/pub/time.series/la/la.data.64.County (RECODED by the authors)
unempDataLAUS = read.csv("LAUS_CountyData_Unemp1211.csv", stringsAsFactors = F)

## Gas price data from www.GasBuddy.com (RECODED by the authors)
gasAugData<-read.dta("gas2012Aug.dta")
gasSepData<-read.dta("gas2012Sep.dta")
gasOctData<-read.dta("gas2012Oct.dta")
newcolName<-c("gasAug", "gasSep",
              "gasOct")
gasData<-list(gasAugData, gasSepData,
              gasOctData)

## Foreclosures data from www.realtytrac.com (RECODED by the authors)
forcData<-read.csv("foreclosure2012Reduced.csv", stringsAsFactors = F)

## Income data from https://www.census.gov/did/www/saipe/data/interactive/saipe.html?s_appName=saipe&map_yearSelector=2014&map_geoSelector=aa_c&s_year=2015,2014,2013,2012,2011,2010,2009,2008,2007,2006,2005,2004&menu=grid_proxy&s_measures=mhi_snc (RECODED by the authors)
incomeData = read.csv("IncomeData1211.csv", stringsAsFactors = F)


#######################
# 2. Merge Data Sets #
#######################

# Unemployment
fourDigits<-which(str_detect(unempDataLAUS$fips, "^\\d{4}$")) # detect four digits
fiveDigits<-paste("0", unempDataLAUS$fips[fourDigits], sep="")
unempDataLAUS$fips[fourDigits]<-fiveDigits

data$unempJan12 <- rep(NA, nrow(data))
data$unempFeb12 <- rep(NA, nrow(data))
data$unempMar12 <- rep(NA, nrow(data))
data$unempApr12 <- rep(NA, nrow(data))
data$unempMay12 <- rep(NA, nrow(data))
data$unempJune12 <- rep(NA, nrow(data))
data$unempJuly12 <- rep(NA, nrow(data))
data$unempAug12 <- rep(NA, nrow(data))
data$unempSep12 <- rep(NA, nrow(data))
data$unempOct12 <- rep(NA, nrow(data))
data$unempNov12 <- rep(NA, nrow(data))
data$unempDec12 <- rep(NA, nrow(data))
data$unempJan11 <- rep(NA, nrow(data))
data$unempFeb11 <- rep(NA, nrow(data))
data$unempMar11 <- rep(NA, nrow(data))
data$unempApr11 <- rep(NA, nrow(data))
data$unempMay11 <- rep(NA, nrow(data))
data$unempJune11 <- rep(NA, nrow(data))
data$unempJuly11 <- rep(NA, nrow(data))
data$unempAug11 <- rep(NA, nrow(data))
data$unempSep11 <- rep(NA, nrow(data))
data$unempOct11 <- rep(NA, nrow(data))
data$unempNov11 <- rep(NA, nrow(data))
data$unempDec11 <- rep(NA, nrow(data))

for (i in 1:nrow(unempDataLAUS)){
  matched<-which(unempDataLAUS$fips[i]==data$countyfips_post)
  data$unempJan12[matched] <- unempDataLAUS[,2][i]
  data$unempFeb12[matched] <- unempDataLAUS[,4][i]
  data$unempMar12[matched] <- unempDataLAUS[,6][i]
  data$unempApr12[matched] <- unempDataLAUS[,8][i]
  data$unempMay12[matched] <- unempDataLAUS[,10][i]
  data$unempJune12[matched] <- unempDataLAUS[,12][i]
  data$unempJuly12[matched] <- unempDataLAUS[,14][i]
  data$unempAug12[matched] <- unempDataLAUS[,16][i]
  data$unempSep12[matched] <- unempDataLAUS[,18][i]
  data$unempOct12[matched] <- unempDataLAUS[,20][i]
  data$unempNov12[matched] <- unempDataLAUS[,22][i]
  data$unempDec12[matched] <- unempDataLAUS[,24][i]
  data$unempJan11[matched] <- unempDataLAUS[,3][i]
  data$unempFeb11[matched] <- unempDataLAUS[,5][i]
  data$unempMar11[matched] <- unempDataLAUS[,7][i]
  data$unempApr11[matched] <- unempDataLAUS[,9][i]
  data$unempMay11[matched] <- unempDataLAUS[,11][i]
  data$unempJune11[matched] <- unempDataLAUS[,13][i]
  data$unempJuly11[matched] <- unempDataLAUS[,15][i]
  data$unempAug11[matched] <- unempDataLAUS[,17][i]
  data$unempSep11[matched] <- unempDataLAUS[,19][i]
  data$unempOct11[matched] <- unempDataLAUS[,21][i]
  data$unempNov11[matched] <- unempDataLAUS[,23][i]
  data$unempDec11[matched] <- unempDataLAUS[,25][i]
  if (i %% 1000 == 0) print(i)
}

## Foreclosures
threeDigits<-which(str_detect(forcData$Zip, "^\\d{3}$")) # detect three digits
fiveDigits<-paste("00", forcData$Zip[threeDigits], sep="")
forcData$Zip[threeDigits]<-fiveDigits
fourDigits<-which(str_detect(forcData$Zip, "^\\d{4}$")) # detect four digits
fiveDigits<-paste("0", forcData$Zip[fourDigits], sep="")
forcData$Zip[fourDigits]<-fiveDigits

data$totForcAugNov <- rep(NA, nrow(data))
data$totForcAugOct <- rep(NA, nrow(data))
data$chgForcAugNov <- rep(NA, nrow(data))
data$chgForcAugOct <- rep(NA, nrow(data))

for (i in 1:nrow(forcData)){
  matched<-which(forcData$Zip[i]==data$lookupzip_post)
  data$totForcAugNov[matched] <- forcData$TotAugNov[i]
  data$totForcAugOct[matched] <- forcData$TotAugOct[i]
  data$chgForcAugNov[matched] <- forcData$ChgAugNov[i]
  data$chgForcAugOct[matched] <- forcData$ChgAugOct[i]
  if (i %% 1000 == 0) print(i)
}


# Gas
for (j in 1:length(gasData)){
  fourDigits<-which(str_detect(gasData[j][[1]]$Zip, "^\\d{4}$")) # detect four digits
  fiveDigits<-paste("0", gasData[j][[1]]$Zip[fourDigits], sep="")
  gasData[j][[1]]$Zip[fourDigits]<-fiveDigits
  
  data$gasPrice <- rep(NA, nrow(data))
  for (i in 1:nrow(gasData[j][[1]])){
    matched<-which(gasData[j][[1]]$Zip[i]==data$lookupzip_post)
    data$gasPrice[matched] <- gasData[j][[1]]$Price[i]
  }
  newcol<-ncol(data)
  colnames(data)[newcol]<-newcolName[j]
  if (j %% 1000 == 0) print(j)
}


incomeData$fips = as.character(incomeData$fips)
fourDigits<-which(str_detect(incomeData$fips, "^\\d{4}$")) # detect four digits
fiveDigits<-paste("0", incomeData$fips[fourDigits], sep="")
incomeData$fips[fourDigits]<-fiveDigits

dataMerged = merge(data, incomeData, by.x = "countyfips_post", by.y = "fips", all.x=T)
length(which(is.na(dataMerged$inc11)))

write.csv(dataMerged, "cces12Merged.csv", row.names = F)


###############################
#
# END - I. CREATE MERGED DATA #
#
###############################



###############################
#
# II. DATA SETUP & IMPUTATION #
#
###############################

rm(list = ls())
library(foreign); library(Amelia); library(stringr)
data<-read.csv("cces12Merged.csv", stringsAsFactors = F)

## County ID
J <- length(unique(data$countyfips_post))
for (i in 1:J){
  data$countyID[data$countyfips_post==unique(data$countyfips_post)[i]] <- i
}
table(data$countyID)

## State ID
L <- length(unique(data$inputstate))
for (i in 1:L){
  data$stateID[data$inputstate==unique(data$inputstate)[i]] <- i
}
table(data$stateID)


## Presidential vote
table(as.factor(data$CC410a)) # vote
data$pvote2 <- rep(NA, nrow(data))
data$pvote2[as.numeric(as.factor(data$CC410a))==1] <- 1
data$pvote2[as.numeric(as.factor(data$CC410a))==4] <- 0
table(data$pvote2) # 1 = Obama; 0 = Romney --> Two party




## Evaluations of national economy

# Retrospective Eval
table(as.factor(data$CC302))
data$natecon5 <- rep(NA, nrow(data))
data$natecon5[as.numeric(as.factor(data$CC302))==2] <- 1
data$natecon5[as.numeric(as.factor(data$CC302))==1] <- 2
data$natecon5[as.numeric(as.factor(data$CC302))==6] <- 3
data$natecon5[as.numeric(as.factor(data$CC302))==4] <- 4
data$natecon5[as.numeric(as.factor(data$CC302))==3] <- 5
table(data$natecon5) # 1=much better; ... 5=much worse



## COVARIATES

# Gender
table(as.factor(data$gender))
data$female <- rep(0, nrow(data)) # no missing
data$female[as.numeric(as.factor(data$gender)) == 1] <- 1
table(data$female)

# Age
data$age<-2012-data$birthyr
table(data$age)

# Race
table(as.factor(data$race))
data$raceNew <- as.numeric(as.factor(data$race))
data$raceNew[data$raceNew != 2 & data$raceNew != 3] <- 0
data$raceNew[data$raceNew == 2] <- 1 # black
data$raceNew[data$raceNew == 3] <- 2 # hispanic
table(data$raceNew)

# Employment status
table(as.factor(data$employ))
data$employment <- as.numeric(as.factor(data$employ))
data$employment[data$employment != 1 & data$employment != 4 & data$employment != 8 & data$employment != 9] <- 0
data$employment[data$employment == 4] <- 2 # part-time
data$employment[data$employment == 8 | data$employment == 9] <- 3 # unemployed
table(data$employment) # 1=Full 2=part; 3=unemployed

# Income
table(data$faminc)
data$income <- rep(NA, nrow(data))
data$income[as.numeric(as.factor(data$faminc))==17]=1
data$income[as.numeric(as.factor(data$faminc))==1]=2
data$income[as.numeric(as.factor(data$faminc))==5]=3
data$income[as.numeric(as.factor(data$faminc))==9]=4
data$income[as.numeric(as.factor(data$faminc))==11]=5
data$income[as.numeric(as.factor(data$faminc))==12]=6
data$income[as.numeric(as.factor(data$faminc))==14]=7
data$income[as.numeric(as.factor(data$faminc))==15]=8
data$income[as.numeric(as.factor(data$faminc))==16]=9
data$income[as.numeric(as.factor(data$faminc))==2]=10
data$income[as.numeric(as.factor(data$faminc))==3]=11
data$income[as.numeric(as.factor(data$faminc))==4]=12
data$income[as.numeric(as.factor(data$faminc))==6]=13
data$income[as.numeric(as.factor(data$faminc))==7]=14
data$income[as.numeric(as.factor(data$faminc))==10]=15
data$income[as.numeric(as.factor(data$faminc))==13]=16
table(data$income)

# Education
table(as.factor(data$educ))
data$educNew <- as.numeric(as.factor(data$educ))
data$educNew[data$educNew==5] <- 0 # post-grad
data$educNew[data$educNew==1 | data$educNew==6] <- 1 # someCollege
table(data$educNew) # 2 = fourCollege; 3=HighSchool; 4=noHigh

# Own/Rent
table(as.factor(data$ownhome))
data$ownHome <- rep(NA, nrow(data))
data$ownHome[as.numeric(as.factor(data$ownhome))==2] <- 1
data$ownHome[as.numeric(as.factor(data$ownhome))!=2] <- 0
table(data$ownHome)

# Party ID
table(as.factor(data$pid3))
data$party3 <- as.numeric(as.factor(data$pid3))
data$party3[data$party3==3 | data$party3==4] <- 0
data$party3[data$party3==5] <- 3
table(data$party3) # 1=dem; 2=ind; 3=rep

# Ideology
table(as.factor(data$ideo5))
data$ideol <- rep(NA, nrow(data))
data$ideol[as.numeric(as.factor(data$ideo5))==4] <- NA
data$ideol[as.numeric(as.factor(data$ideo5))==5] <- 1
data$ideol[as.numeric(as.factor(data$ideo5))==1] <- 2
data$ideol[as.numeric(as.factor(data$ideo5))==3] <- 3
data$ideol[as.numeric(as.factor(data$ideo5))==2] <- 4
data$ideol[as.numeric(as.factor(data$ideo5))==6] <- 5
table(data$ideol) # 1=very liberal ... 5=very conservative

# News interest
table(as.factor(data$newsint))
data$newsInt <- rep(NA, nrow(data))
data$newsInt[as.numeric(as.factor(data$newsint))==1]<-NA
data$newsInt[as.numeric(as.factor(data$newsint))==2]<-1
data$newsInt[as.numeric(as.factor(data$newsint))==4]<-2
data$newsInt[as.numeric(as.factor(data$newsint))==5]<-3
data$newsInt[as.numeric(as.factor(data$newsint))==3]<-4
table(data$newsInt)



### Create a data frame: df ###
df<-data[, c("V101", "V103", 
             "countyname",
             "pvote2", "natecon5", "stateID", "countyID",
             "totForcAugNov", "totForcAugOct", "chgForcAugNov", "chgForcAugOct", 
             "gasAug", "gasSep", "gasOct", "gasNov",
             "unempJan11", "unempFeb11", "unempMar11",
             "unempApr11", "unempMay11", 
             "unempJune11", "unempJuly11", "unempAug11",
             "unempSep11", "unempOct11", "unempNov11", "unempDec11",
             "unempJan12", "unempFeb12", "unempMar12",
             "unempApr12", "unempMay12", 
             "unempJune12", "unempJuly12", "unempAug12",
             "unempSep12", "unempOct12", "unempNov12", "unempDec12",
             "inc12", "inc11",
             "age", "female", "raceNew",
             "employment",
             "income", "educNew",
             "ownHome", "party3", "ideol", "newsInt")]
colnames(df) <- c("id", "weight", colnames(df)[-c(1,2)])


df<-df[-(which(is.na(df$pvote2))),]

set.seed(123)
data12Imputed <- amelia(df[,-3], m=1, ords=c("natecon5", 
                                             "stateID", "countyID",
                                             "age", "female", "raceNew",
                                             "employment",
                                             "income", "educNew",
                                             "ownHome", "party3", "ideol", "newsInt"))
write.amelia(data12Imputed, separate=T, "data12Imputed", format="dta")
#####################################
#
# END - II. DATA SETUP & IMPUTATION #
#
#####################################


